package module4

import module4.udfs.{InitialsUDF, InterestCapitalizationUDF}
import org.apache.spark.sql.{Dataset, Row, SparkSession}
import org.apache.spark.sql.functions.{call_udf, col, lit, when}
import org.apache.spark.sql.types.DataTypes

class UdfOperations {
  def initials(): Unit ={
    val spark: SparkSession = SparkSession.builder()
      .appName("fundament-sparka")
      .master("local")
      .getOrCreate()

    import spark.implicits._

    val initialsUDF: InitialsUDF = new InitialsUDF()
    spark.udf.register("initialsUDF", initialsUDF, DataTypes.StringType)

    val people: Seq[(String, String, String, Int)] = Seq(("1", "marek", "czuma", 28), ("2", "ania", "kowalska", 30), ("3", "magda", "nowak", 28),
      ("4", "jan", "kowalski", 15), ("5", "jozef", "czuma", 25), ("6", "ignacy", "czuma", 35),
      ("7", "laura", "moscicka", 68), ("8", "zuzanna", "birecka", 12), ("9", "roman", "kowalski", 45),
      ("10", "marek", "kowalski", 68), ("11", "ignacy", "nowak", 43), ("12", "ania", "nowak", 33),
      ("13", "laura", "czuma", 6), ("14", "karol", "birecki", 21), ("15", "karol", "nowak", 43),
      ("16", "jan", "moscicki", 33), ("17", "jan", "birecki", 36), ("18", "andrzej", "kowalski", 82))

    val peopleDF: Dataset[Row] = people.toDF("id", "firstName", "lastName", "age")

    val peopleWithInitials: Dataset[Row] = peopleDF.withColumn("initials",
      call_udf("initialsUDF", col("firstName"), col("lastName")))

    peopleWithInitials.show()
  }

  def capitalizationMoney(): Unit ={
    val spark: SparkSession = SparkSession.builder()
      .appName("fundament-sparka")
      .master("local")
      .getOrCreate()

    import spark.implicits._

    val interestCapitalizationUDF: InterestCapitalizationUDF = new InterestCapitalizationUDF()
    spark.udf.register("interestCapitalizationUDF", interestCapitalizationUDF, DataTypes.DoubleType)

    val peopleDF = spark.read
      .option("header", "true")
      .csv("money_saving.csv")

    val peopleWithMoneyDF: Dataset[Row] = peopleDF.withColumn("money", col("money").cast(DataTypes.LongType))
      .withColumn("interest", col("interest").cast(DataTypes.IntegerType))
      .withColumn("10years", call_udf("interestCapitalizationUDF", col("money"), lit(10), col("interest")))
      .withColumn("20years", call_udf("interestCapitalizationUDF", col("money"), lit(20), col("interest")))
      .withColumn("40years", call_udf("interestCapitalizationUDF", col("money"), lit(40), col("interest")))
      .withColumn("60years", call_udf("interestCapitalizationUDF", col("money"), lit(60), col("interest")))

    peopleWithMoneyDF.show()
  }

  def checkingIsAdult(): Unit ={
    val spark: SparkSession = SparkSession.builder()
      .appName("fundament-sparka")
      .master("local")
      .getOrCreate()

    import spark.implicits._

    val people: Seq[(String, String, String, Int)] = Seq(("1", "marek", "czuma", 28), ("2", "ania", "kowalska", 30), ("3", "magda", "nowak", 28),
      ("4", "jan", "kowalski", 15), ("5", "jozef", "czuma", 25), ("6", "ignacy", "czuma", 35),
      ("7", "laura", "moscicka", 68), ("8", "zuzanna", "birecka", 12), ("9", "roman", "kowalski", 45),
      ("10", "marek", "kowalski", 68), ("11", "ignacy", "nowak", 43), ("12", "ania", "nowak", 33),
      ("13", "laura", "czuma", 6), ("14", "karol", "birecki", 21), ("15", "karol", "nowak", 43),
      ("16", "jan", "moscicki", 33), ("17", "jan", "birecki", 36), ("18", "andrzej", "kowalski", 82))

    val peopleDF: Dataset[Row] = people.toDF("id", "firstName", "lastName", "age")

    val peopleWithAdultsDF: Dataset[Row] = peopleDF.transform(isAdult)

    peopleWithAdultsDF.show()

  }

  def isAdult(df: Dataset[Row]): Dataset[Row] ={
    df.withColumn("isAdult", when(col("age").geq(18), "T").otherwise("F"))
  }
}
